# -*- coding: utf-8 -*-
"""
Created on Fri Apr 15 14:48:44 2016

@author: ppradeep
"""
#path = 'W:/Rapid Tox/'
path = 'C:/Users/ppradeep/Desktop/Rapid Tox/'

## Exploring distribution of ER binding activity for hindered phenols based on different properties - MOE, Physchem etc.
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import ttest_ind

## Global properties. BoxPlots and T-test statistics. 
f0 = pd.read_csv(path+'ReadAcross-Project/cerapp_moe.txt', sep = ',', encoding = 'latin1', index_col = 'CERAPP_ID')

p_val = {}
properties =  f0.columns[2:]
for prop in properties:
    x0 = f0[f0['Observed_class_binding'] == 0][prop]
    x1 = f0[f0['Observed_class_binding'] == 1][prop]
    p_val[prop] = ttest_ind(x0, x1)[1]
    
ax = f0.boxplot(column='a_donacc', by = 'Observed_class_binding')
ax.text(1.2, 30.5, r'P-value: $6.89 \times 10^{-4}$', fontsize = 14)
plt.title("") # get rid of the automatic 'Boxplot grouped by group_by_column_name' title
plt.suptitle("")
x_ticks = ['Non-binder','Binder']
plt.xticks([1,2], x_ticks)
plt.tick_params(axis='x', labelsize=22, pad=15)
plt.ylabel("H-bond donors & acceptors", fontsize=22)
plt.xlabel("", fontsize=22) #Observed Binding
plt.savefig(path+'ReadAcross-Project/Cut-Off/don_acc.png',bbox_inches='tight')

ax = f0.boxplot(column='logP(o/w)', by = 'Observed_class_binding')
ax.text(1.2, 12.2, r'P-value: $9.79 \times 10^{-18}$', fontsize = 14)
plt.title("") # get rid of the automatic 'Boxplot grouped by group_by_column_name' title
plt.suptitle("")
x_ticks = ['Non-binder','Binder']
plt.xticks([1,2], x_ticks)
plt.tick_params(axis='x', labelsize=22, pad=15)
plt.ylabel("LogP", fontsize=22)
plt.xlabel("", fontsize=22)
plt.savefig(path+'ReadAcross-Project/Cut-Off/logp.png',bbox_inches='tight')


ax = f0.boxplot(column='vol', by = 'Observed_class_binding')
ax.text(1.2, 710, r'P-value: $9.95 \times 10^{-4}$', fontsize = 14)
plt.title("") # get rid of the automatic 'Boxplot grouped by group_by_column_name' title
plt.suptitle("")
x_ticks = ['Non-binder','Binder']
plt.xticks([1,2], x_ticks)
plt.tick_params(axis='x', labelsize=22, pad=15)
plt.ylabel("Molecular Volume", fontsize=22)
plt.xlabel("", fontsize=22)
plt.savefig(path+'ReadAcross-Project/Cut-Off/vol.png',bbox_inches='tight')

#%%
## R group properties. Boxplots and T-Test statistics.

import csv
from scipy.stats import ttest_ind

for r_num in range(2,4):
    f0 = pd.read_csv('W:\Rapid Tox\ReadAcross-Project\R-groupAnalysis/r%d.txt' %r_num, sep = ',', encoding = 'latin1', index_col = 'CERAPP_ID')
    for col in f0.columns.values[3:]:
        f0.boxplot(column=col, by = 'Observed_class_binding')


n = open('W:/Rapid Tox/ReadAcross-Project/R-groupAnalysis/RgrpPropTTest.csv','w')
writeCSV = csv.writer(n)
writeCSV.writerow(['R-group', 'Property' , 't-value', 'p-value'])

for r_num in range(1,13):
    f = pd.read_csv('W:/Rapid Tox/ReadAcross-Project/R-groupAnalysis/r%d.txt' %r_num, sep = ',', encoding = 'latin1', index_col = 'CERAPP_ID')    
    properties =  f.columns[3:]
    for prop in properties:
        x0 = f[f['Observed_class_binding'] == 0][prop]
        x1 = f[f['Observed_class_binding'] == 1][prop]
        [t, p ] = ttest_ind(x0, x1)
        writeCSV.writerow(['R%d' %r_num, prop, t, p])

n.close()